This document describes how to go from raw vcftools output of diversity metrics (Fst, pi, and Tajima’s D) to Manhattan plots, including making figures for the manuscript associated with this repo. This script is a result of brute forcing things to work: I am certainly no expert, and thus lots of the notes refer to my own dumb mistakes.

library(tidyverse)
library(qqman)
library(scales)

Prepare files: join vcftools output into a single table.

First, read in raw output files from vcftools (in this repo, see filter-scan.sh for code to generate).

fst.UKUS.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_UKUS_50kb.windowed.weir.fst",sep="\t"))
fst.AUUK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUK_50kb.windowed.weir.fst",sep="\t"))
fst.USAU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseud_nostep_AUUS_50kb.windowed.weir.fst",sep="\t"))
pi.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_pi_50kb.windowed.pi",sep="\t"))
pi.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_pi_50kb.windowed.pi",sep="\t"))
pi.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_pi_50kb.windowed.pi",sep="\t"))
TajD.UK.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_UK_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.AU.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_AU_TajimaD_50kb.Tajima.D",sep="\t"))
TajD.US.50kb <- as_tibble(read.csv("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/input/EUSTreseq_pseudochrom_US_TajimaD_50kb.Tajima.D",sep="\t"))

vcftools outputs a few identifies for position: “CHROM,” “BIN_START” and “BIN_END” for .fst and .pi files, but only “CHROM” AND “BIN_START” for .Tajima.D files. Unfortunately, the numbering is off, so we’ll add 1 to every “BIN_START” in the .Tajima.D files.

head(TajD.AU.50kb)
## # A tibble: 6 x 4
##   CHROM BIN_START N_SNPS TajimaD
##   <fct>     <int>  <int>   <dbl>
## 1 10            0    147  0.583 
## 2 10        50000    334  0.372 
## 3 10       100000    301  0.836 
## 4 10       150000     98  1.56  
## 5 10       200000    102 -0.557 
## 6 10       250000    139 -0.0356
head(fst.AUUK.50kb)
## # A tibble: 6 x 6
##   CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST  MEAN_FST
##   <fct>     <int>   <int>      <int>        <dbl>     <dbl>
## 1 10            1   50000        164      0.00792  0.0105  
## 2 10        50001  100000        379      0.0595   0.0500  
## 3 10       100001  150000        311      0.00728 -0.000459
## 4 10       150001  200000        120      0.0162   0.0275  
## 5 10       200001  250000        121      0.126    0.0926  
## 6 10       250001  300000        155      0.109    0.0747
TajD.UK.50kb$BIN_START <- TajD.UK.50kb$BIN_START + 1
TajD.AU.50kb$BIN_START <- TajD.AU.50kb$BIN_START + 1
TajD.US.50kb$BIN_START <- TajD.US.50kb$BIN_START + 1

Now BIN_START should match. To use dplyr, we’ll need a column that specifes a unique position in the genome. Create a new column that joins “CHROM” and “BIN_START” so that we match each value based on actual position in the genome.

fst.UKUS.50kb$POS_ID <- paste(fst.UKUS.50kb$CHROM,fst.UKUS.50kb$BIN_START,sep="-")
fst.AUUK.50kb$POS_ID <- paste(fst.AUUK.50kb$CHROM,fst.AUUK.50kb$BIN_START,sep="-")
fst.USAU.50kb$POS_ID <- paste(fst.USAU.50kb$CHROM,fst.USAU.50kb$BIN_START,sep="-")
pi.UK.50kb$POS_ID <- paste(pi.UK.50kb$CHROM,pi.UK.50kb$BIN_START,sep="-")
pi.AU.50kb$POS_ID <- paste(pi.AU.50kb$CHROM,pi.AU.50kb$BIN_START,sep="-")
pi.US.50kb$POS_ID <- paste(pi.US.50kb$CHROM,pi.US.50kb$BIN_START,sep="-")
TajD.UK.50kb$POS_ID <- paste(TajD.UK.50kb$CHROM,TajD.UK.50kb$BIN_START,sep="-")
TajD.AU.50kb$POS_ID <- paste(TajD.AU.50kb$CHROM,TajD.AU.50kb$BIN_START,sep="-")
TajD.US.50kb$POS_ID <- paste(TajD.US.50kb$CHROM,TajD.US.50kb$BIN_START,sep="-")

Now drop column names that we no longer need, otherwise we’ll have a giant table after all the merging.

fst.AUUK.50kb <- fst.AUUK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
fst.USAU.50kb <- fst.USAU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.UK.50kb <- pi.UK.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.AU.50kb <- pi.AU.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
pi.US.50kb <- pi.US.50kb %>% select(-CHROM, -BIN_START, -BIN_END, -N_VARIANTS)
TajD.UK.50kb <- TajD.UK.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.AU.50kb <- TajD.AU.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)
TajD.US.50kb <- TajD.US.50kb %>% select(-CHROM, -BIN_START, -N_SNPS)

We’ll join tables based on the unique position identified (POS_ID). The rename() command ensures that each new column header specifies population (new name first). Heads up: each time you join two tables, those two tables are no longer accessible except as a joined table!

fstUKUS.fstAUUK.50kb <- left_join(fst.UKUS.50kb,fst.AUUK.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UKUS","_AUUK"))
fst.50kb <- left_join(fstUKUS.fstAUUK.50kb,fst.USAU.50kb, by = "POS_ID", copy = FALSE, suffix=c("","_USAU"))
fst.50kb <- rename(fst.50kb, WEIGHTED_FST_USAU = WEIGHTED_FST)
fst.50kb <- rename(fst.50kb, MEAN_FST_USAU = MEAN_FST)
fst.50kb.piUK <- left_join(fst.50kb,pi.UK.50kb, by = "POS_ID", copy = FALSE)
fst.50kb.piUK.piUS <- left_join(fst.50kb.piUK,pi.US.50kb, by = "POS_ID", copy = FALSE,suffix=c("_UK","_US"))
fst.pi.50kb <- left_join(fst.50kb.piUK.piUS,pi.AU.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb <- rename(fst.pi.50kb, PI_AU = PI)
fst.pi.50kb.TajDUK <- left_join(fst.pi.50kb,TajD.UK.50kb, by = "POS_ID", copy = FALSE)
fst.pi.50kb.TajDUK.TajDUS <- left_join(fst.pi.50kb.TajDUK,TajD.US.50kb, by = "POS_ID", copy = FALSE, suffix=c("_UK","_US"))
div <- left_join(fst.pi.50kb.TajDUK.TajDUS,TajD.AU.50kb, by = "POS_ID", copy = FALSE)
div <- rename(div, TajimaD_AU = TajimaD)

We’re going to drop data from: * small scaffolds (which conveniently start with ‘KQ’ or ‘LNCF’, * any rows (positions) with missing data (NA), * and also coerce “negative” FST or pi to zero. We replace POS_ID (which was converted to numeric) with SNP. For the qqman package, chromosomes need to be a numeric value, so we also use lapply() below to rename chromosomes.

div <- filter(div, !grepl('KQ',CHROM))
div <- filter(div, !grepl('LNCF',CHROM))
div <- filter(div, !grepl('Unknown',CHROM))
div <- div %>% drop_na()
div[,c(5:6)][div[,c(5:6)] < 0] <- 0
div[,c(8:14)][div[,c(8:14)] < 0] <- 0
div <- data.frame(lapply(div, function(x) {gsub("1A", "1.25", x)}))
div <- data.frame(lapply(div, function(x) {gsub("1B", "1.75", x)}))
div <- data.frame(lapply(div, function(x) {gsub("4A", "4.5", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LG5", "28", x)}))
div <- data.frame(lapply(div, function(x) {gsub("LGE22", "29", x)}))
div <- data.frame(lapply(div, function(x) {gsub("Z", "0", x)}))
indx <- sapply(div, is.factor)
div[indx] <- lapply(div[indx], function(x) as.numeric(as.character(x)))
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
div <- div %>% select(-POS_ID)
div$SNP <- seq.int(nrow(div))
str(div)
## 'data.frame':    20071 obs. of  17 variables:
##  $ CHROM            : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ BIN_START        : num  1 50001 100001 150001 200001 ...
##  $ BIN_END          : num  50000 100000 150000 200000 250000 300000 350000 400000 450000 500000 ...
##  $ N_VARIANTS       : num  165 377 311 120 121 156 80 101 24 18 ...
##  $ WEIGHTED_FST_UKUS: num  0.01745 0.03751 0 0 0.00586 ...
##  $ MEAN_FST_UKUS    : num  0.0133 0.0272 0 0 0 ...
##  $ WEIGHTED_FST_AUUK: num  0.00792 0.05948 0.00728 0.01624 0.12564 ...
##  $ MEAN_FST_AUUK    : num  0.0105 0.05 0 0.0275 0.0926 ...
##  $ WEIGHTED_FST_USAU: num  0.01291 0.00258 0 0 0.00315 ...
##  $ MEAN_FST_USAU    : num  0.0093 0.00585 0 0 0 ...
##  $ PI_UK            : num  0.001053 0.002189 0.002212 0.000915 0.000879 ...
##  $ PI_US            : num  0.001063 0.002373 0.002003 0.000829 0.0007 ...
##  $ PI_AU            : num  0.001024 0.002211 0.0022 0.000804 0.000532 ...
##  $ TajimaD_UK       : num  0.808 0.293 0.877 1.426 0.968 ...
##  $ TajimaD_US       : num  0.42348 0.58012 0.80239 1.05163 -0.00424 ...
##  $ TajimaD_AU       : num  0.583 0.372 0.836 1.557 -0.557 ...
##  $ SNP              : int  1 2 3 4 5 6 7 8 9 10 ...

Adding in a calculation here so that we don’t have to repeat it when filtering div table.

div$piUK.piAU <- div$PI_UK - div$PI_AU
div$piUK.piUS <- div$PI_UK - div$PI_US

Now we’re ready to plot and calculate genome-wide values!

Exploring genetic variation

First, we look at the distribution of variation across the genome.

Manhattan plots

manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.UKUS.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen 
##                 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Fst.AUUK.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen 
##                 2
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.USAU.Manhattan.pdf",w=12,h=3)
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_USAU", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))
dev.off()
## quartz_off_screen 
##                 2

Identifying outliers

quantile(div$WEIGHTED_FST_AUUK, c(.9,.95,.99,.999)) 
##       90%       95%       99%     99.9% 
## 0.0676462 0.0862444 0.1490122 0.3077638
quantile(div$WEIGHTED_FST_UKUS, c(.9,.95,.99,.999)) 
##       90%       95%       99%     99.9% 
## 0.0441469 0.0609766 0.1156152 0.2228447
mean(div$WEIGHTED_FST_AUUK) + 5*sd(div$WEIGHTED_FST_AUUK)
## [1] 0.1936444
mean(div$WEIGHTED_FST_UKUS) + 5*sd(div$WEIGHTED_FST_UKUS)
## [1] 0.1406712
div.outliers.AUUK <- div[which(div$WEIGHTED_FST_AUUK > quantile(div$WEIGHTED_FST_AUUK,.99)),]
div.outliers.USUK <- div[which(div$WEIGHTED_FST_UKUS > quantile(div$WEIGHTED_FST_UKUS,.99)),]

div.hifst.AUUK <- div[which(div$WEIGHTED_FST_AUUK > 0.1),]
div.hifst.UKUS <- div[which(div$WEIGHTED_FST_UKUS > 0.1),]

unique(div.outliers.USUK$CHROM)
##  [1] 11.00 12.00 13.00 17.00 19.00  1.25  1.00 28.00  2.00  3.00  4.50  4.00
## [13]  6.00 29.00  0.00
length(div.outliers.USUK$SNP)
## [1] 201
write.csv(div.outliers.USUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.USUK.csv")

unique(div.outliers.AUUK$CHROM)
##  [1] 10.00 12.00 13.00 17.00 18.00  1.25  1.00 23.00 27.00  2.00  3.00  4.50
## [13]  4.00  5.00  6.00  7.00  8.00  0.00
length(div.outliers.AUUK$SNP)
## [1] 201
write.csv(div.outliers.AUUK,"/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstOutliers.AUUK.csv")

What’s going on w/ other metrics at these outliers?

summary(div.outliers.AUUK$PI_UK)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000025 0.0000730 0.0001890 0.0009734 0.0014188 0.0057224
summary(div.outliers.AUUK$PI_AU)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.383e-05 7.067e-05 2.285e-04 9.324e-04 1.447e-03 5.313e-03
summary(div.outliers.AUUK$TajimaD_AU)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.3050  0.2574  0.7501  0.6479  1.2528  2.6922
summary(div.outliers.AUUK$TajimaD_UK)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.2937  0.1481  0.6493  0.5926  1.0098  2.3588
summary(div.outliers.USUK$PI_US)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 7.167e-06 6.250e-05 2.890e-04 8.204e-04 1.281e-03 5.202e-03
summary(div.outliers.USUK$PI_UK)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000210 0.0000765 0.0002912 0.0008650 0.0012335 0.0058409
summary(div.outliers.USUK$TajimaD_US)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.1488 -0.4076  0.4343  0.3082  0.9422  2.9728
summary(div.outliers.USUK$TajimaD_UK)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.9252  0.3047  0.8963  0.8645  1.4612  2.5895
library(fitdistrplus)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: survival
descdist(div.outliers.AUUK$PI_UK)

## summary statistics
## ------
## min:  2.5e-06   max:  0.00572238 
## median:  0.000189 
## mean:  0.0009734123 
## estimated sd:  0.001348425 
## estimated skewness:  1.597318 
## estimated kurtosis:  4.653
descdist(div.outliers.AUUK$PI_AU)

## summary statistics
## ------
## min:  1.38333e-05   max:  0.00531275 
## median:  0.0002285 
## mean:  0.0009324382 
## estimated sd:  0.001252321 
## estimated skewness:  1.532558 
## estimated kurtosis:  4.458218
descdist(div.outliers.USUK$PI_UK)

## summary statistics
## ------
## min:  2.1e-05   max:  0.00584092 
## median:  0.000291167 
## mean:  0.000865046 
## estimated sd:  0.001150262 
## estimated skewness:  1.693987 
## estimated kurtosis:  5.349374
descdist(div.outliers.USUK$PI_US)

## summary statistics
## ------
## min:  7.16667e-06   max:  0.00520175 
## median:  0.000289007 
## mean:  0.0008203641 
## estimated sd:  0.001078227 
## estimated skewness:  1.65203 
## estimated kurtosis:  5.131592
#beta.outliers.AUUK <- fitdist(div.outliers.AUUK$PI_AU, "beta")
#summary(beta.outliers.AUUK)

#beta.outliers.USUK <- fitdist(div.outliers.USUK$PI_US, "beta")
#summary(beta.outliers.USUK)
div.outliers.AUUK.lowFstUSAU <- div.outliers.AUUK[which(div.outliers.AUUK$WEIGHTED_FST_USAU < 0.01 ),] 
div.outliers.AUUK.lowFstUSAU
##       CHROM BIN_START  BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 7     10.00    300001   350000         80         0.1027220     0.0600280
## 9     10.00    400001   450000         24         0.0578802     0.0408439
## 10    10.00    450001   500000         18         0.0641170     0.0438801
## 903   12.00   3500001  3550000        313         0.1841560     0.1390400
## 1414  13.00   8300001  8350000          6         0.0888562     0.0764819
## 2251  17.00   2350001  2400000         90         0.0718635     0.0453814
## 3399   1.25  25150001 25200000         80         0.1291380     0.1210420
## 3400   1.25  25200001 25250000         65         0.0763648     0.0755731
## 3793   1.25  44850001 44900000          7         0.1502550     0.1512780
## 3946   1.25  52500001 52550000          9         0.1002160     0.0841356
## 13219  4.50   6100001  6150000        132         0.1381170     0.1304820
## 13220  4.50   6150001  6200000         84         0.1382960     0.1325750
## 16265  6.00   5500001  5550000         61         0.2868590     0.2827380
## 16267  6.00   5600001  5650000         66         0.2488270     0.2538260
## 16268  6.00   5650001  5700000         90         0.2626990     0.2607650
## 16271  6.00   5800001  5850000         95         0.1569920     0.1609480
## 19739  0.00  51900001 51950000         60         0.1874130     0.1492570
## 19962  0.00  63050001 63100000        232         0.1055780     0.0686902
## 19999  0.00  64900001 64950000        186         0.1765470     0.1102810
## 20000  0.00  64950001 65000000        240         0.2637760     0.1947360
## 20002  0.00  65050001 65100000        450         0.1136620     0.0975800
##       WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 7              0.190274      0.122576       0.000000000   0.000000000
## 9              0.150496      0.109492       0.000000000   0.000000000
## 10             0.197232      0.155583       0.000000000   0.000000000
## 903            0.172857      0.132176       0.000000000   0.000000000
## 1414           0.165927      0.125393       0.004134130   0.019587600
## 2251           0.179935      0.149568       0.000000000   0.000475237
## 3399           0.187165      0.173330       0.000000000   0.000000000
## 3400           0.179424      0.168180       0.000000000   0.000000000
## 3793           0.192503      0.166546       0.008088430   0.000000000
## 3946           0.157895      0.133751       0.000000000   0.000000000
## 13219          0.149469      0.141418       0.000000000   0.000000000
## 13220          0.164826      0.153364       0.000000000   0.000000000
## 16265          0.178806      0.178051       0.000000000   0.000000000
## 16267          0.155095      0.162605       0.000000000   0.000000000
## 16268          0.172276      0.167504       0.000000000   0.010494800
## 16271          0.155570      0.151752       0.004985280   0.006458370
## 19739          0.158887      0.126400       0.006943530   0.004539620
## 19962          0.159585      0.111195       0.000000000   0.003472230
## 19999          0.250744      0.187151       0.004209500   0.002449910
## 20000          0.230248      0.162655       0.000000000   0.000000000
## 20002          0.171135      0.133933       0.000369775   0.000000000
##             PI_UK       PI_US       PI_AU TajimaD_UK TajimaD_US TajimaD_AU
## 7     5.80833e-04 3.92847e-04 2.90669e-04   1.010540  -0.569214  -1.333590
## 9     1.45667e-04 1.08668e-04 7.26667e-05   0.674272  -0.938436  -1.463810
## 10    1.15667e-04 7.03333e-05 3.06667e-05   0.510990  -1.402140  -0.520315
## 903   1.58583e-03 2.29027e-03 2.38120e-03   0.322469   1.214250   1.230810
## 1414  3.53333e-05 4.86667e-05 5.13333e-05  -0.078611   2.026320   1.435800
## 2251  5.73500e-04 5.66689e-04 5.08838e-04   0.877600   0.367470   0.645267
## 3399  6.12500e-04 8.16000e-04 7.46855e-04   1.160520   2.972800   2.474060
## 3400  4.95167e-04 6.38006e-04 5.69339e-04   1.386520   2.737810   2.391710
## 3793  6.46667e-05 4.11669e-05 4.38335e-05   1.877030   0.118964   0.239407
## 3946  7.31667e-05 6.43336e-05 5.60000e-05   1.281030   0.765878   0.581803
## 13219 1.16983e-03 4.09190e-04 3.88846e-04   2.126910  -2.148800  -2.179840
## 13220 7.55674e-04 2.66003e-04 2.38502e-04   2.154800  -2.022640  -2.304990
## 16265 2.88333e-05 5.33346e-04 4.75333e-04   1.723710   2.056500   1.339670
## 16267 5.08333e-05 5.92009e-04 5.25841e-04   0.721746   2.161640   1.510490
## 16268 6.93333e-05 7.88034e-04 6.67837e-04  -1.293670   2.136270   1.375800
## 16271 1.58333e-04 6.64365e-04 7.56515e-04  -0.174387   1.129640   1.427160
## 19739 3.98671e-04 3.79014e-04 4.32010e-04   1.206870   0.930709   1.050540
## 19962 1.55317e-03 1.37795e-03 1.14304e-03   0.640391   0.573849   0.685643
## 19999 8.16667e-04 1.26139e-03 1.36611e-03  -0.243400   0.635922   1.156450
## 20000 1.24650e-03 1.72167e-03 1.74439e-03  -0.207223   1.104400   1.037880
## 20002 2.43118e-03 3.10507e-03 2.86397e-03   0.924093   0.736868   0.386692
##         SNP     piUK.piAU     piUK.piUS
## 7         7  0.0002901640  0.0001879860
## 9         9  0.0000730003  0.0000369990
## 10       10  0.0000850003  0.0000453337
## 903     903 -0.0007953700 -0.0007044400
## 1414   1414 -0.0000160000 -0.0000133334
## 2251   2251  0.0000646620  0.0000068110
## 3399   3399 -0.0001343550 -0.0002035000
## 3400   3400 -0.0000741720 -0.0001428390
## 3793   3793  0.0000208332  0.0000234998
## 3946   3946  0.0000171667  0.0000088331
## 13219 13219  0.0007809840  0.0007606400
## 13220 13220  0.0005171720  0.0004896710
## 16265 16265 -0.0004464997 -0.0005045127
## 16267 16267 -0.0004750077 -0.0005411757
## 16268 16268 -0.0005985037 -0.0007187007
## 16271 16271 -0.0005981820 -0.0005060320
## 19739 19739 -0.0000333390  0.0000196570
## 19962 19962  0.0004101300  0.0001752200
## 19999 19999 -0.0005494430 -0.0004447230
## 20000 20000 -0.0004978900 -0.0004751700
## 20002 20002 -0.0004327900 -0.0006738900
length(div.outliers.AUUK.lowFstUSAU$SNP)
## [1] 21
div.outliers.USUK.lowFstUSAU <- div.outliers.USUK[which(div.outliers.USUK$WEIGHTED_FST_USAU < 0.01 ),] 
div.outliers.USUK.lowFstUSAU
##       CHROM BIN_START   BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 903   12.00   3500001   3550000        313          0.184156     0.1390400
## 3399   1.25  25150001  25200000         80          0.129138     0.1210420
## 3772   1.25  43800001  43850000          9          0.233478     0.2201170
## 3773   1.25  43850001  43900000          9          0.164466     0.1563530
## 3774   1.25  43900001  43950000          8          0.222953     0.2224730
## 3781   1.25  44250001  44300000          8          0.198336     0.1773510
## 3788   1.25  44600001  44650000          8          0.183673     0.1641710
## 3790   1.25  44700001  44750000          9          0.198634     0.1724300
## 3791   1.25  44750001  44800000          4          0.126050     0.1050630
## 3792   1.25  44800001  44850000          5          0.158329     0.1520360
## 3793   1.25  44850001  44900000          7          0.150255     0.1512780
## 3826   1.25  46500001  46550000         16          0.119742     0.1104310
## 3941   1.25  52250001  52300000          6          0.144998     0.1072330
## 3950   1.25  52700001  52750000          3          0.122153     0.0888192
## 6514   1.00 106850001 106900000         40          0.132203     0.1245470
## 13217  4.50   6000001   6050000         87          0.127251     0.1166620
## 13218  4.50   6050001   6100000        122          0.124097     0.1208020
## 13219  4.50   6100001   6150000        132          0.138117     0.1304820
## 13220  4.50   6150001   6200000         84          0.138296     0.1325750
## 14057  4.00  27600001  27650000         43          0.124484     0.1000850
## 14058  4.00  27650001  27700000         15          0.124772     0.1110270
## 14243  4.00  36900001  36950000        120          0.144461     0.1311450
## 16262  6.00   5350001   5400000         23          0.152651     0.1426320
## 16265  6.00   5500001   5550000         61          0.286859     0.2827380
## 16266  6.00   5550001   5600000         76          0.236749     0.2467770
## 16267  6.00   5600001   5650000         66          0.248827     0.2538260
## 16268  6.00   5650001   5700000         90          0.262699     0.2607650
## 16269  6.00   5700001   5750000        118          0.200669     0.1998990
## 16270  6.00   5750001   5800000         84          0.189015     0.1918170
## 16271  6.00   5800001   5850000         95          0.156992     0.1609480
## 19081  0.00  18950001  19000000        222          0.189965     0.1459400
## 19082  0.00  19000001  19050000        193          0.145196     0.1232680
## 19739  0.00  51900001  51950000         60          0.187413     0.1492570
## 19800  0.00  54950001  55000000        332          0.138968     0.1067000
## 19932  0.00  61550001  61600000        113          0.165185     0.1225240
## 19999  0.00  64900001  64950000        186          0.176547     0.1102810
## 20000  0.00  64950001  65000000        240          0.263776     0.1947360
## 20001  0.00  65000001  65050000        277          0.128128     0.0954193
## 20006  0.00  65250001  65300000        194          0.175280     0.1503040
##       WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 903           0.1728570     0.1321760        0.00000000    0.00000000
## 3399          0.1871650     0.1733300        0.00000000    0.00000000
## 3772          0.1356170     0.1306400        0.00000000    0.00000000
## 3773          0.0852238     0.0765575        0.00000000    0.00000000
## 3774          0.1335340     0.1317840        0.00000000    0.00000000
## 3781          0.1177940     0.1036950        0.00646088    0.00000000
## 3788          0.1177940     0.1036950        0.00209059    0.00000000
## 3790          0.1181560     0.1062690        0.00000000    0.00000000
## 3791          0.1087810     0.0889597        0.00000000    0.00000000
## 3792          0.1012870     0.1007000        0.00000000    0.00000000
## 3793          0.1925030     0.1665460        0.00808843    0.00000000
## 3826          0.1379380     0.1391420        0.00000000    0.00899771
## 3941          0.0853294     0.0588546        0.00000000    0.00000000
## 3950          0.0778325     0.0539009        0.00000000    0.00000000
## 6514          0.1120650     0.1021280        0.00000000    0.00000000
## 13217         0.1235350     0.1125110        0.00000000    0.00000000
## 13218         0.1407820     0.1299840        0.00000000    0.00000000
## 13219         0.1494690     0.1414180        0.00000000    0.00000000
## 13220         0.1648260     0.1533640        0.00000000    0.00000000
## 14057         0.0421729     0.0368661        0.00000000    0.00000000
## 14058         0.0708094     0.0544110        0.00000000    0.00000000
## 14243         0.0396591     0.0321878        0.00000000    0.00000000
## 16262         0.1195050     0.1207770        0.00000000    0.00000000
## 16265         0.1788060     0.1780510        0.00000000    0.00000000
## 16266         0.1419130     0.1501990        0.00000000    0.00000000
## 16267         0.1550950     0.1626050        0.00000000    0.00000000
## 16268         0.1722760     0.1675040        0.00000000    0.01049480
## 16269         0.1428110     0.1457580        0.00000000    0.00000000
## 16270         0.1452970     0.1502240        0.00000000    0.00000000
## 16271         0.1555700     0.1517520        0.00498528    0.00645837
## 19081         0.0750444     0.0538881        0.00000000    0.00000000
## 19082         0.0193002     0.0267927        0.00000000    0.00000000
## 19739         0.1588870     0.1264000        0.00694353    0.00453962
## 19800         0.0791149     0.0666468        0.00000000    0.00000000
## 19932         0.0833852     0.0580127        0.00000000    0.00000000
## 19999         0.2507440     0.1871510        0.00420950    0.00244991
## 20000         0.2302480     0.1626550        0.00000000    0.00000000
## 20001         0.1301150     0.1041760        0.00000000    0.00000000
## 20006         0.1216110     0.1062280        0.00000000    0.00000000
##             PI_UK       PI_US       PI_AU TajimaD_UK TajimaD_US TajimaD_AU
## 903   1.58583e-03 2.29027e-03 2.38120e-03   0.322469   1.214250  1.2308100
## 3399  6.12500e-04 8.16000e-04 7.46855e-04   1.160520   2.972800  2.4740600
## 3772  9.01667e-05 2.65000e-05 4.65000e-05   2.432020  -1.878520 -0.5244280
## 3773  8.30000e-05 4.03333e-05 4.91667e-05   1.946800  -0.941941 -0.3438820
## 3774  8.16667e-05 2.76668e-05 4.20002e-05   2.500830  -1.353500 -0.3733890
## 3781  7.41667e-05 2.76667e-05 3.45000e-05   1.940080  -1.536610 -0.6423260
## 3788  7.41667e-05 2.98333e-05 3.45000e-05   1.940080  -1.374610 -0.6423260
## 3790  8.61667e-05 2.65001e-05 3.85002e-05   2.161200  -1.833620 -0.6350750
## 3791  3.55000e-05 1.96667e-05 2.45000e-05   1.478110  -0.576488  0.0507059
## 3792  4.75000e-05 2.45000e-05 3.60003e-05   1.898740  -0.616373  0.2162880
## 3793  6.46667e-05 4.11669e-05 4.38335e-05   1.877030   0.118964  0.2394070
## 3826  1.56500e-04 1.14501e-04 1.17334e-04   2.455710   0.890794  1.0316700
## 3941  4.66669e-05 1.55001e-05 2.20002e-05   1.110000  -1.317040 -0.6062470
## 3950  2.56667e-05 1.18333e-05 1.58333e-05   1.216020  -1.001800 -0.3605060
## 6514  2.03833e-04 3.69672e-04 3.52672e-04  -0.557074   2.328280  2.7553400
## 13217 7.23833e-04 2.57341e-04 3.00170e-04   1.775250  -2.075340 -1.8571700
## 13218 1.04117e-03 4.05516e-04 3.68677e-04   2.007180  -1.910490 -2.1351400
## 13219 1.16983e-03 4.09190e-04 3.88846e-04   2.126910  -2.148800 -2.1798400
## 13220 7.55674e-04 2.66003e-04 2.38502e-04   2.154800  -2.022640 -2.3049900
## 14057 3.81000e-04 3.27002e-04 3.94502e-04   2.124500   1.261110  2.2358600
## 14058 1.20500e-04 9.08347e-05 1.06668e-04   1.303780   1.346540  1.2527000
## 14243 1.05652e-03 8.72538e-04 9.60703e-04   2.022060   1.051440  1.8017000
## 16262 5.05000e-05 1.48335e-04 1.75833e-04  -0.253609   0.363975  0.8790340
## 16265 2.88333e-05 5.33346e-04 4.75333e-04   1.723710   2.056500  1.3396700
## 16266 9.40005e-05 6.90347e-04 6.21012e-04   0.189762   2.226990  1.4170800
## 16267 5.08333e-05 5.92009e-04 5.25841e-04   0.721746   2.161640  1.5104900
## 16268 6.93333e-05 7.88034e-04 6.67837e-04  -1.293670   2.136270  1.3758000
## 16269 1.06667e-04 9.61546e-04 9.50343e-04  -1.925220   1.627650  1.3995400
## 16270 7.71667e-05 6.45525e-04 6.42512e-04  -0.682127   1.393540  1.2760900
## 16271 1.58333e-04 6.64365e-04 7.56515e-04  -0.174387   1.129640  1.4271600
## 19081 1.18901e-03 1.24461e-03 1.22436e-03   0.862748   0.758027  0.7976620
## 19082 1.11667e-03 1.30971e-03 1.30786e-03   2.029490   0.784797  1.0812900
## 19739 3.98671e-04 3.79014e-04 4.32010e-04   1.206870   0.930709  1.0505400
## 19800 2.37000e-03 1.99995e-03 2.30563e-03   1.246390   0.748836  1.2112400
## 19932 6.72170e-04 8.29064e-04 8.34358e-04   0.502783   1.071620  1.1047700
## 19999 8.16667e-04 1.26139e-03 1.36611e-03  -0.243400   0.635922  1.1564500
## 20000 1.24650e-03 1.72167e-03 1.74439e-03  -0.207223   1.104400  1.0378800
## 20001 1.53219e-03 1.96551e-03 2.01766e-03   0.304739   1.123120  1.3799400
## 20006 1.57217e-03 1.30438e-03 1.47139e-03   1.530390   1.099090  1.1772900
##         SNP     piUK.piAU     piUK.piUS
## 903     903 -0.0007953700 -0.0007044400
## 3399   3399 -0.0001343550 -0.0002035000
## 3772   3772  0.0000436667  0.0000636667
## 3773   3773  0.0000338333  0.0000426667
## 3774   3774  0.0000396665  0.0000539999
## 3781   3781  0.0000396667  0.0000465000
## 3788   3788  0.0000396667  0.0000443334
## 3790   3790  0.0000476665  0.0000596666
## 3791   3791  0.0000110000  0.0000158333
## 3792   3792  0.0000114997  0.0000230000
## 3793   3793  0.0000208332  0.0000234998
## 3826   3826  0.0000391660  0.0000419990
## 3941   3941  0.0000246667  0.0000311668
## 3950   3950  0.0000098334  0.0000138334
## 6514   6514 -0.0001488390 -0.0001658390
## 13217 13217  0.0004236630  0.0004664920
## 13218 13218  0.0006724930  0.0006356540
## 13219 13219  0.0007809840  0.0007606400
## 13220 13220  0.0005171720  0.0004896710
## 14057 14057 -0.0000135020  0.0000539980
## 14058 14058  0.0000138320  0.0000296653
## 14243 14243  0.0000958170  0.0001839820
## 16262 16262 -0.0001253330 -0.0000978350
## 16265 16265 -0.0004464997 -0.0005045127
## 16266 16266 -0.0005270115 -0.0005963465
## 16267 16267 -0.0004750077 -0.0005411757
## 16268 16268 -0.0005985037 -0.0007187007
## 16269 16269 -0.0008436760 -0.0008548790
## 16270 16270 -0.0005653453 -0.0005683583
## 16271 16271 -0.0005981820 -0.0005060320
## 19081 19081 -0.0000353500 -0.0000556000
## 19082 19082 -0.0001911900 -0.0001930400
## 19739 19739 -0.0000333390  0.0000196570
## 19800 19800  0.0000643700  0.0003700500
## 19932 19932 -0.0001621880 -0.0001568940
## 19999 19999 -0.0005494430 -0.0004447230
## 20000 20000 -0.0004978900 -0.0004751700
## 20001 20001 -0.0004854700 -0.0004333200
## 20006 20006  0.0001007800  0.0002677900
length(div.outliers.USUK.lowFstUSAU$SNP)
## [1] 39

Possible parallel “selection” ?

Histograms of FST, pi

What’s the statistical distribution of these values?

descdist(div$WEIGHTED_FST_AUUK)

## summary statistics
## ------
## min:  0   max:  0.401902 
## median:  0.0265902 
## mean:  0.03258447 
## estimated sd:  0.03221199 
## estimated skewness:  2.902543 
## estimated kurtosis:  19.80308
descdist(div$WEIGHTED_FST_UKUS)

## summary statistics
## ------
## min:  0   max:  0.34149 
## median:  0.0125022 
## mean:  0.01873338 
## estimated sd:  0.02438756 
## estimated skewness:  3.325395 
## estimated kurtosis:  22.98486
descdist(div$WEIGHTED_FST_USAU)

## summary statistics
## ------
## min:  0   max:  0.482831 
## median:  0.0341419 
## mean:  0.04037524 
## estimated sd:  0.03744596 
## estimated skewness:  3.350824 
## estimated kurtosis:  25.99446
lab.AU <- rep("AU.UK",length(div$WEIGHTED_FST_AUUK))
lab.US <- rep("UK.US",length(div$WEIGHTED_FST_UKUS))
Fst.group <- c(lab.AU,lab.US)
Fst.hist.data <- c(div$WEIGHTED_FST_AUUK,div$WEIGHTED_FST_USUK)
Fst.hist <- data.frame(Fst = Fst.hist.data, population = Fst.group)
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Fst.pdf",width=4,height=3)
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
  theme_classic() +
  geom_density(alpha=0.5,lwd=0.5) +
  scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) + 
  xlab("Fst") + labs(fill="Population") + 
  geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)
dev.off()
## quartz_off_screen 
##                 2
ggplot(Fst.hist, aes(x=Fst, y=..density.., fill=population)) +
  theme_classic() +
  geom_density(alpha=0.5,lwd=0.5) +
  scale_fill_manual(values=c("#F2C14E","#2c81a8")) + xlim(0,0.5) + 
  xlab("Fst") + labs(fill="Population") + 
  geom_vline(xintercept=0.03,colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.01,colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  geom_vline(xintercept=0.08,colour=alpha("gray50"),linetype="dotted", size=0.5)

ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)
ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_UKUS, y=div$PI_US),col="#2c81a8",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=PI_US),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic()
## Warning: Use of `div$WEIGHTED_FST_UKUS` is discouraged. Use `WEIGHTED_FST_UKUS`
## instead.
## Warning: Use of `div$PI_US` is discouraged. Use `PI_US` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
  xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.AUUK.pdf",height=2,width=2)
ggplot(data=div) +
  geom_point(aes(x=div$WEIGHTED_FST_AUUK, y=div$PI_AU),col="#F2C14E",cex=0.7) +
  xlim(0,0.31) + ylim(0,0.04) + theme_classic() +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=PI_AU),method="loess",col="black",lwd=0.5) +
  xlab("") + ylab("")
## Warning: Use of `div$WEIGHTED_FST_AUUK` is discouraged. Use `WEIGHTED_FST_AUUK`
## instead.
## Warning: Use of `div$PI_AU` is discouraged. Use `PI_AU` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
summary(div$PI_AU)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000025 0.0027682 0.0040090 0.0038645 0.0050565 0.0138998
summary(div$PI_UK)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000025 0.0028343 0.0041312 0.0039757 0.0051943 0.0141869
summary(div$PI_US)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 4.667e-06 2.753e-03 4.016e-03 3.863e-03 5.045e-03 1.409e-02
descdist(div$PI_US)

## summary statistics
## ------
## min:  4.66667e-06   max:  0.0140869 
## median:  0.00401555 
## mean:  0.003862564 
## estimated sd:  0.001783127 
## estimated skewness:  -0.1258447 
## estimated kurtosis:  3.05688
#norm.piUS.fit <- fitdist(div$PI_US,distr="norm")
#summary(norm.piUS.fit)

descdist(div$PI_UK)

## summary statistics
## ------
## min:  2.5e-06   max:  0.0141869 
## median:  0.00413121 
## mean:  0.003975727 
## estimated sd:  0.001837387 
## estimated skewness:  -0.1188124 
## estimated kurtosis:  3.055468
#norm.piUK.fit <- fitdist(div$PI_UK,distr="norm")
#summary(norm.piUK.fit)

descdist(div$PI_AU)

## summary statistics
## ------
## min:  2.5e-06   max:  0.0138998 
## median:  0.00400898 
## mean:  0.003864496 
## estimated sd:  0.001775347 
## estimated skewness:  -0.1396863 
## estimated kurtosis:  3.030743
#norm.piAU.fit <- fitdist(div$PI_AU,distr="norm")
#summary(norm.piAU.fit)

lab.AU <- rep("AU",length(div$PI_AU))
lab.US <- rep("US",length(div$PI_US))
lab.UK <- rep("UK",length(div$PI_UK))
group <- c(lab.AU,lab.US,lab.UK)
pi.hist.data <- c(div$PI_UK,div$PI_US,div$PI_AU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame':    60213 obs. of  2 variables:
##  $ pi        : num  0.001053 0.002189 0.002212 0.000915 0.000879 ...
##  $ population: Factor w/ 3 levels "AU","UK","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) + 
  xlab("Pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  theme(legend.position="none")

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_Pi.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("black","#2c81a8","#F2C14E")) + xlim(-0.0001,0.02) + 
  xlab("Pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$PI_US),colour=alpha("#2c81a8"),linetype="dashed", size=1) +
  geom_vline(xintercept=mean(div$PI_AU),colour=alpha("#F2C14E"),linetype="dashed", size=1) +
  theme(legend.position="none")
dev.off()
## quartz_off_screen 
##                 2

Average nucleotide diversity for both invasions is the same (0.003). There are two vertical lines overlaid in the plot above.

ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_USvsUK.pdf",width=2,height=2)
ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div.outliers.USUK) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/PiOutliers_USvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.USUK) +
  geom_point(aes(x=PI_UK, y=PI_US),col="#2c81a8",cex=0.7) +
  xlab("") + ylab("") + xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_US),span=0.2,method="loess",col="black",lwd=0.5)
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.02) + ylim(0,0.02) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2
ggplot(data=div.outliers.AUUK) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).

dev.off()
## null device 
##           1
pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Pi_AUvsUK.pdf",width=2,height=2)
ggplot(data=div.outliers.AUUK) +
  geom_point(aes(x=PI_UK, y=PI_AU),col="#F2C14E",cex=0.7) +
  xlab("") + ylab("") +
  xlim(0,0.01) + ylim(0,0.01) + theme_classic() +
  theme(axis.text=element_text(size=7,colour="black")) +
  stat_smooth(aes(x=PI_UK, y=PI_AU),span=0.2,method="loess",col="black",lwd=0.5) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (geom_smooth).
dev.off()
## null device 
##           1
descdist(div$TajimaD_US)

## summary statistics
## ------
## min:  -2.1488   max:  3.16509 
## median:  0.744616 
## mean:  0.7115798 
## estimated sd:  0.3466121 
## estimated skewness:  -1.309557 
## estimated kurtosis:  13.47233
descdist(div$TajimaD_UK)

## summary statistics
## ------
## min:  -2.2276   max:  3.23445 
## median:  0.737875 
## mean:  0.7152536 
## estimated sd:  0.3263563 
## estimated skewness:  -0.4741872 
## estimated kurtosis:  12.17869
descdist(div$TajimaD_AU)

## summary statistics
## ------
## min:  -2.33194   max:  3.16824 
## median:  0.798065 
## mean:  0.7829885 
## estimated sd:  0.3372602 
## estimated skewness:  -0.5499719 
## estimated kurtosis:  12.87531
descdist(div.outliers.USUK$TajimaD_US)

## summary statistics
## ------
## min:  -2.1488   max:  2.9728 
## median:  0.434343 
## mean:  0.3082123 
## estimated sd:  1.017184 
## estimated skewness:  -0.2567428 
## estimated kurtosis:  2.728269
descdist(div.outliers.AUUK$TajimaD_AU)

## summary statistics
## ------
## min:  -2.30499   max:  2.69219 
## median:  0.750086 
## mean:  0.6478708 
## estimated sd:  0.8718095 
## estimated skewness:  -0.7820734 
## estimated kurtosis:  3.98327

Step 4: What’s the impact of the genetic bottlenecks and subsequent expansion on genetic diversity in each invasion?

What’s the difference in diversity between native and invasive ranges?

When comparing ancestral diversity to a bottlenecked invasive population, we’d expect to see lower diversity in the invasive range overall. This difference would be more pronounced in regions that had differentiated from the native range (e.g., where drift and/or selection are more pronounced).

Here, difference in pi > 0 means that diversity is higher in the native range than in the invasive.

summary(div$piUK.piAU)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -1.552e-03 -1.935e-05  9.278e-05  1.112e-04  2.319e-04  2.028e-03
qqnorm(div$piUK.piAU, pch = 16)
qqline(div$piUK.piAU, pch = 16)

descdist(div$piUK.piAU)

## summary statistics
## ------
## min:  -0.00155244   max:  0.00202796 
## median:  9.278e-05 
## mean:  0.0001112317 
## estimated sd:  0.0002326892 
## estimated skewness:  0.4570582 
## estimated kurtosis:  5.950369
summary(div$piUK.piUS)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -1.381e-03 -2.535e-06  1.035e-04  1.132e-04  2.277e-04  1.762e-03
qqnorm(div$piUK.piUS, pch = 16)
qqline(div$piUK.piUS, pch = 16)

descdist(div$piUK.piUS)

## summary statistics
## ------
## min:  -0.00138117   max:  0.00176181 
## median:  0.000103491 
## mean:  0.0001131636 
## estimated sd:  0.0002065841 
## estimated skewness:  0.1527603 
## estimated kurtosis:  5.842408
div.hiUKpi.vsAU <- div[which(div$piUK.piAU > 0),]
div.hiUKpi.vsUS <- div[which(div$piUK.piUS > 0),]
div.hiUKpi.both <- div.hiUKpi.vsAU[which(div$piUK.piUS > 0),]

length(div.hiUKpi.vsAU$piUK.piAU)/length(div$piUK.piAU) # % of windows that have higher pi in native
## [1] 0.7049973
length(div.hiUKpi.vsUS$piUK.piUS)/length(div$piUK.piUS) 
## [1] 0.7432116
length(div.hiUKpi.both$piUK.piUS)/length(div$piUK.piUS) # % of windows with higher pi in native for both invasive ranges
## [1] 0.7432116

If drift acting on novel mutations explains most of the differentiation, then where FST is high, diversity should also be higher in the invasions. Could also be due to weaker purifying selection during population expansion.

descdist(div.hifst.AUUK$piUK.piAU)

## summary statistics
## ------
## min:  -0.00119697   max:  0.00202796 
## median:  3.283325e-05 
## mean:  0.0001246263 
## estimated sd:  0.0004012797 
## estimated skewness:  0.807314 
## estimated kurtosis:  4.950103
div.hifst.AUUK.hiUKpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU > 0),]
length(div.hifst.AUUK.hiUKpi$piUK.piAU)/length(div.hifst.AUUK$piUK.piAU) # % of high FST windows that have higher pi in native
## [1] 0.6224189
descdist(div.hifst.UKUS$piUK.piUS)

## summary statistics
## ------
## min:  -0.00119758   max:  0.00176181 
## median:  2.5496e-05 
## mean:  6.588734e-05 
## estimated sd:  0.0003507603 
## estimated skewness:  0.6636127 
## estimated kurtosis:  6.832871
div.hifst.UKUS.hiUKpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS > 0),]
length(div.hifst.UKUS.hiUKpi$piUK.piUS)/length(div.hifst.UKUS$piUK.piUS) 
## [1] 0.6989619
div.hiUKpi.both <- div.hifst.UKUS.hiUKpi[which(div.hifst.AUUK.hiUKpi$piUK.piUS > 0),]
length(div.hiUKpi.both$piUK.piUS)
## [1] 295
div.hifst.AUUK.hiAUpi <- div.hifst.AUUK[which(div.hifst.AUUK$piUK.piAU < 0),]
div.hifst.UKUS.hiUSpi <- div.hifst.UKUS[which(div.hifst.UKUS$piUK.piUS < 0),]

Now to plot these differences.

lab.AU <- rep("AU",length(div$piUK.piAU))
lab.US <- rep("US",length(div$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div$piUK.piUS,div$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame':    40142 obs. of  2 variables:
##  $ pi        : num  -1.04e-05 -1.83e-04 2.09e-04 8.63e-05 1.79e-04 ...
##  $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")
dev.off()
## quartz_off_screen 
##                 2

Plotting high-fst (FST > 0.1) windows only

lab.AU <- rep("AU",length(div.hifst.AUUK$piUK.piAU))
lab.US <- rep("US",length(div.hifst.UKUS$piUK.piUS))
group <- c(lab.AU,lab.US)
pi.hist.data <- c(div.hifst.UKUS$piUK.piUS,div.hifst.AUUK$piUK.piAU)
pi.hist.lab <- data.frame(pi = pi.hist.data, population = group)
str(pi.hist.lab)
## 'data.frame':    967 obs. of  2 variables:
##  $ pi        : num  1.88e-04 8.97e-05 1.13e-04 4.36e-04 2.19e-04 ...
##  $ population: Factor w/ 2 levels "AU","US": 1 1 1 1 1 1 1 1 1 1 ...
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/HistDensity_PiDifference_HiFst.pdf",width=4,height=3)
ggplot(pi.hist.lab, aes(x=pi, y=..density.., fill=population)) +
  geom_vline(xintercept=0,colour="black",size=0.5) +
  geom_vline(xintercept=0.0005,colour="black",size=0.5) +
  geom_vline(xintercept=-0.0005,colour="black",size=0.5) +
  geom_density(alpha=0.8,lwd=0.5) + theme_classic() +
  scale_fill_manual(values=c("#2c81a8","#F2C14E")) + 
  xlab("Difference in pi") + labs(fill="Population") +
  geom_vline(xintercept=mean(div.hifst.AUUK$piUK.piAU),colour=alpha("#F2C14E"), size=1) +
  geom_vline(xintercept=mean(div.hifst.UKUS$piUK.piUS),colour=alpha("#2c81a8"), size=1) +
  theme(legend.position="none")
dev.off()
## quartz_off_screen 
##                 2

Are regions with novel pi also highly differentiated? Expect this scatterplot to look bimodal, where shifts in diversity in either direction led to differentiation between populations.

ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
  #xlab("Fst (Native vs. Invasive)") + ylab("Pi Invasive") +
  xlab("") + ylab("") +
  stat_smooth(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),method="loess",col="black",lwd=0.5) +
  xlim(0,0.31) + ylim(-0.002,0.002) + theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).

ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
  geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
  xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
  xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
  geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
  geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
  guides(col = guide_legend(label = TRUE, label.position = "bottom", 
                           direction = "horizontal"))

pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.pdf",height=5,width=5)
ggplot(data=div) +
  geom_point(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS),col="#2c81a8",cex=0.7) +
  geom_point(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU),col="#F2C14E",cex=0.7) +
  xlab("Fst (Native vs. Invasive)") + ylab("Pi Native - Pi Invasive") +
  xlim(-0.01,0.41) + ylim(-0.003,0.003) + theme_bw() +
  geom_density_2d(aes(x=WEIGHTED_FST_AUUK, y=piUK.piAU), colour="#ffffff") +
  geom_density_2d(aes(x=WEIGHTED_FST_UKUS, y=piUK.piUS), colour="#2c81a8") +
  guides(col = guide_legend(label = TRUE, label.position = "bottom", 
                           direction = "horizontal"))
dev.off()
## quartz_off_screen 
##                 2
#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/FstPi.density.USUK.pdf",height=2,width=2)

#dev.off()

In a given window, if diversity in the invasive range is higher than that of the native range, it is possible that those variants are novel mutations. This filtering will tell us whether we should look at particular genotypes in these regions.

div.novelpi <- div %>% 
  filter((div$piUK.piAU < 0) & (div$piUK.piUS < 0))

# % of low native diversity SNPs are higher in diversity in both invasions
# sanity check based on calc above
length(div.novelpi$SNP)/length(div$SNP)
## [1] 0.1386578
div.novelUSpi <- div %>%
    filter(div$piUK.piUS < 0)
length(div.novelUSpi$SNP)/length(div$SNP)
## [1] 0.2566888
div.novelAUpi <- div %>%
    filter(div$piUK.piAU < 0)
length(div.novelAUpi$SNP)/length(div$SNP)
## [1] 0.2948533

How to test whether novel pi is evenly distributed across the genome? If even, then we expect to see a random sampling of chromosomes represented in this smaller dataset.

unique(div.novelpi$CHROM) # which chromosomes have higher invasive pi in both invasions
##  [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00  1.25  1.75  1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00  2.00  3.00  4.50
## [25]  4.00  5.00  6.00  7.00  8.00  9.00 29.00  0.00
unique(div.novelUSpi$CHROM) # just in US
##  [1] 10.00 11.00 12.00 13.00 14.00 15.00 17.00 18.00 19.00  1.25  1.75  1.00
## [13] 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00  2.00  3.00  4.50
## [25]  4.00  5.00  6.00  7.00  8.00  9.00 29.00  0.00
unique(div.novelAUpi$CHROM) # in AU
##  [1] 10.00 11.00 12.00 13.00 14.00 15.00 16.00 17.00 18.00 19.00  1.25  1.75
## [13]  1.00 20.00 21.00 22.00 23.00 24.00 25.00 26.00 27.00 28.00  2.00  3.00
## [25]  4.50  4.00  5.00  6.00  7.00  8.00  9.00 29.00  0.00
length(unique(div.novelpi$CHROM))/length(unique(div$CHROM))
## [1] 0.969697
length(unique(div.novelUSpi$SNP)) # are most of these higher invasive pi regions also the moderately differentiated windows
## [1] 5152
length(unique(div.hifst.UKUS$SNP))
## [1] 289
length(unique(div.hifst.UKUS$SNP))/length(unique(div.novelUSpi$SNP))
## [1] 0.05609472
length(unique(div.novelAUpi$SNP))
## [1] 5918
length(unique(div.hifst.AUUK$SNP))
## [1] 678
length(unique(div.hifst.AUUK$SNP))/length(unique(div.novelAUpi$SNP))
## [1] 0.1145657

This is a different calculation than asking whether differentiated windows are also high in invasive diversity, since we’re drawing from different sets.

Can we color points in the manhattan plot by diff in diversity

SNP.novelUSpi <- div.novelUSpi$SNP
SNP.novelAUpi <- div.novelAUpi$SNP

SNP.novelAUpi.hifstonly <- div.hifst.AUUK.hiAUpi$SNP
SNP.novelUSpi.hifstonly <- div.hifst.UKUS.hiUSpi$SNP

length(div.hifst.UKUS.hiUSpi$SNP) 
## [1] 87
length(div.hifst.AUUK.hiAUpi$SNP)
## [1] 256
manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.UKUS.Manhattan.pdf",w=12,h=3)

#dev.off()

manhattan(div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1, chrlabs=c("Z",1,"1A","1B",2,3,4,"4A",5:29))

#pdf("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Fst.AUUK.Manhattan.pdf",w=12,h=3)

#dev.off()

What’s going on at outlier windows in particular?

div.outliers.hiAUpi <- div.outliers.AUUK[which(div.outliers.AUUK$piUK.piAU < 0),]

length(div.outliers.hiAUpi$SNP)
## [1] 91
length(div.outliers.hiAUpi$SNP)/length(div.outliers.AUUK$SNP)
## [1] 0.4527363

% of FST outlier windows have higher diversity in AU

div.outliers.hiUSpi <- div.outliers.USUK[which(div.outliers.USUK$piUK.piUS < 0),]

length(div.outliers.hiUSpi$SNP) 
## [1] 61
length(div.outliers.hiUSpi$SNP)/length(div.outliers.USUK$SNP)
## [1] 0.3034826

% of FST outlier windows have higher diversity in US

intersect(div.outliers.hiUSpi$CHROM,div.outliers.hiAUpi$CHROM)
## [1] 12.00  1.25  1.00  2.00  3.00  6.00  0.00
unique(div.outliers.hiAUpi$CHROM)
##  [1] 12.00 13.00  1.25  1.00 23.00 27.00  2.00  3.00  4.50  5.00  6.00  0.00
unique(div.outliers.hiUSpi$CHROM) 
## [1] 12.00 17.00  1.25  1.00  2.00  3.00  4.00  6.00  0.00

Figures: Diversity underlying candidate peaks

The plots below are based on code from Gemma Clucas.

Chromosome 2

chrom2.div <- div[which(div$CHROM==2),]

manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom2.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom2.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom2.div.small <- chrom2.div[which(chrom2.div$SNP < 8980),]
chrom2.div.small <- chrom2.div.small[which(chrom2.div.small$SNP > 8600),]
# 39200001 to 51650000

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1)) # set rows
par(mar=c(0,2,0.5,2)) # set margins for each plot
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.5))
lines((chrom2.div.small$BIN_START), chrom2.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.5))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.04))
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom2.div.small$BIN_START), chrom2.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,2.6))
lines((chrom2.div.small$BIN_START), chrom2.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4)) # tajima's D axis
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/Chromosome2.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 6

chrom6.div <- div[which(div$CHROM==6),]
# runs from 16155 to 16871
# chrom 6 = 716 50kb windows ("SNPs" here)

manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom6.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom6.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom6.div.small <- chrom6.div[which(chrom6.div$SNP < 16325),]
chrom6.div.small <- chrom6.div.small[which(chrom6.div.small$SNP > 16225),]

head(chrom6.div.small)
##       CHROM BIN_START BIN_END N_VARIANTS WEIGHTED_FST_UKUS MEAN_FST_UKUS
## 16226     6   3550001 3600000        570        0.01206770   0.008669440
## 16227     6   3600001 3650000        528        0.03079550   0.023766500
## 16228     6   3650001 3700000        701        0.02597620   0.020965100
## 16229     6   3700001 3750000        805        0.00338468   0.000400509
## 16230     6   3750001 3800000        830        0.02082790   0.017915200
## 16231     6   3800001 3850000        800        0.00589529   0.001372410
##       WEIGHTED_FST_AUUK MEAN_FST_AUUK WEIGHTED_FST_USAU MEAN_FST_USAU
## 16226         0.0367649     0.0305788         0.0392762     0.0368076
## 16227         0.0501026     0.0406660         0.0739191     0.0613789
## 16228         0.0711590     0.0593933         0.0454643     0.0368937
## 16229         0.0754966     0.0604820         0.0708642     0.0562380
## 16230         0.0375712     0.0322166         0.0438655     0.0358050
## 16231         0.1001270     0.0672706         0.0743417     0.0579428
##            PI_UK      PI_US      PI_AU TajimaD_UK TajimaD_US TajimaD_AU   SNP
## 16226 0.00366513 0.00384209 0.00372947   0.592114   0.972481   0.785959 16226
## 16227 0.00349155 0.00329177 0.00364804   0.682877   0.782069   0.892366 16227
## 16228 0.00468438 0.00458936 0.00478475   0.859767   0.784092   0.972961 16228
## 16229 0.00535166 0.00537373 0.00535428   0.706416   0.819633   0.873451 16229
## 16230 0.00555363 0.00537467 0.00539254   0.840503   0.753410   0.770190 16230
## 16231 0.00531258 0.00519576 0.00516348   0.695610   0.790285   0.812897 16231
##         piUK.piAU   piUK.piUS
## 16226 -0.00006434 -0.00017696
## 16227 -0.00015649  0.00019978
## 16228 -0.00010037  0.00009502
## 16229 -0.00000262 -0.00002207
## 16230  0.00016109  0.00017896
## 16231  0.00014910  0.00011682
chrom6.div.hifst <- chrom6.div[which(chrom6.div$SNP < 16281),]
chrom6.div.hifst <- chrom6.div.small[which(chrom6.div.small$SNP > 16263),]

# AUUK high fst: window 5350001 to 6300001 on Chrom 6
# "SNP" 16263 to 16281

mean(chrom6.div.hifst$PI_UK) 
## [1] 0.002873224
mean(chrom6.div.hifst$PI_US) 
## [1] 0.002896161
mean(chrom6.div.hifst$PI_AU) 
## [1] 0.00293987
chrom6.div.med <- chrom6.div[which(chrom6.div$SNP < 16450),]
chrom6.div.med <- chrom6.div.med[which(chrom6.div.med$SNP > 16155),]
quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_US, col="#2c81a8", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.04))
lines((chrom6.div.small$BIN_START), chrom6.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,2.4))
lines((chrom6.div.small$BIN_START), chrom6.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,2.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2
quartz(height=3,width=7)
options(scipen=999)
par(new=T)
## Warning in par(new = T): calling par(new=TRUE) with no plot
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.01,0.4))
lines((chrom6.div.med$BIN_START), chrom6.div.med$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.01,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome6.BroaderFSTaroundPeak.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 1

chrom1.div <- div[which(div$CHROM==1),]

manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom1.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom1.div.small <- chrom1.div[which(chrom1.div$SNP < 6700),]
chrom1.div.small <- chrom1.div.small[which(chrom1.div.small$SNP > 6400),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1.div.small$BIN_START), chrom1.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1.div.small$BIN_START), chrom1.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 1A

chrom1A.div <- div[which(div$CHROM==1.25),]
# 2896 to 4342
# 1446 windows, 3 ticks


manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom1A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom1A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

chrom1A.div.small <- chrom1A.div[which(chrom1A.div$SNP < 4000),]
chrom1A.div.small <- chrom1A.div.small[which(chrom1A.div.small$SNP > 3700),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom1A.div.small$BIN_START), chrom1A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome1A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 4

chrom4.div <- div[which(div$CHROM==4),]
# 13506 to 14923,
# 1417 windows, 7 ticks - peak ~500 windows from start 


manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom4.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4.div, chr = "CHROM", bp = "BIN_START", snp = "SNP", :
## You're trying to highlight SNPs that don't exist in your results.

chrom4.div.small <- chrom4.div[which(chrom4.div$SNP < 14150),]
chrom4.div.small <- chrom4.div.small[which(chrom4.div.small$SNP > 13850),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4.div.small$BIN_START), chrom4.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4.div.small$BIN_START), chrom4.div.small$TajimaD_UK, col="#39C855", lwd=1)

axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2

Chromosome 4A

chrom4A.div <- div[which(div$CHROM==4.5),]
# 13097 to 13505
# 408 windows, 4 ticks

manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_AUUK", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelAUpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

manhattan(chrom4A.div, chr="CHROM", bp="BIN_START", snp="SNP", p="WEIGHTED_FST_UKUS", 
          ylim=c(0,0.41),ylab=NA,xlab=NA,logp = FALSE,col=c("grey45","grey65"),
          highlight=SNP.novelUSpi.hifstonly,
          cex=1,cex.axis=1)
## Warning in manhattan(chrom4A.div, chr = "CHROM", bp = "BIN_START", snp =
## "SNP", : You're trying to highlight SNPs that don't exist in your results.

chrom4A.div.small <- chrom4A.div[which(chrom4A.div$SNP < 13300),]
chrom4A.div.small <- chrom4A.div.small[which(chrom4A.div.small$SNP > 13150),]

quartz(height=5,width=7)
options(scipen=999)
par(mfrow=c(2,1))
par(mar=c(0,2,0.5,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_AUUK, col="#F2C14E",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_UKUS, col="#2c81a8", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, type="n", bty="n", axes=FALSE,  ylim=c(-0.2,0.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$WEIGHTED_FST_USAU, col="grey50", lwd=1)
axis(side=2,ylim=c(-0.2,0.4))
abline(h=quantile(div$WEIGHTED_FST_UKUS,.99), col="#2c81a8", lwd=0.5)
abline(h=quantile(div$WEIGHTED_FST_AUUK,.99), col="#F2C14E", lwd=0.5)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, type="n", axes=FALSE, bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_AU, col="#F8DD9E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_US, col="#66A3C0", lwd=2)
axis(side=4, ylim=c(0,0.03))
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(0,0.03))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$PI_UK, col="#39C855", lwd=1)
par(mar=c(1,2,1,2))
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, type="n",axes=FALSE, bty="n", xlab=NA, ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_AU, col="#F2C14E", lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_US, col="#2c81a8",lwd=2)
par(new=T)
plot((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, type="n", axes=FALSE, xlab=NA, ylab=NA,  bty="n", ylim=c(-2.4,3.4))
lines((chrom4A.div.small$BIN_START), chrom4A.div.small$TajimaD_UK, col="#39C855", lwd=1)
axis(side=2, ylim=c(-2.4,3.4))
axis(side=1)

quartz.save("/Users/nataliehofmeister/Documents/Ch3-Global-RESEQ/analysis/R/manhattans_for_pub/Chromosome4A.ManhattanZoom.pdf", type="pdf")
## quartz_off_screen 
##                 2